0.1 Initial setup

0.2 Setup of the environment:

knitr::opts_chunk$set(message = FALSE, warning = FALSE)

#devtools::install_github("sdam-au/sdam") # loading SDAM custom package, if not working try devtools::install_github("mplex/cedhar", subdir="pkg/sdam")
#devtools::install_github("mplex/cedhar", subdir="pkg/sdam")
library(tidyverse)
#library(sdam)
library(jsonlite)
library(leaflet)

0.3 Loading data

list_json <- jsonlite::fromJSON("EDH_text_cleaned_2021-01-21.json")
EDH_tibble <- as_tibble(list_json)
dir.create("../figures")

Display the first 6 records

head(EDH_tibble)

1 Exploration of ‘People’ in EDH

  1. What are names of all attributes within the ‘people’ attribute
  2. How many people are in total in the EDH database?
  3. How many people there are per inscription (average, min, max)
  4. What is the gender ratio of people on inscriptions? (male, female, NA)
  5. What are the names of unique values in the ‘status’ attribute?
  6. What is the ratio of different statuses, e.g. slave vs freedman
  7. How many inscriptions have ‘Age’ category?
  8. What is the average age of people (years, months, days)
  9. What is the origo of people, what is the origo on funerary inscriptions

When I have the pointers how to get the data out of ‘people’ I will be looking at the following specific cases:

Specific case (funerary inscriptions; attribute ‘type_of_inscription_clean’ == ‘epitaph’) 1. How many people are on funerary inscriptions (total, average, min, max) 2. What is the ratio of genders on funerary inscriptions (male, female, NA) 3. What is the age of people on funerary inscriptions (total number of inscriptions with age, average, min, max) 4. What is the average age of people on funerary inscriptions by province

Specific case (gender composition) 1. Ratio of men/women on different types of inscriptions (attribute ‘type_of_inscription_clean’)

EDH_tibble$people[1:2]
## [[1]]
##                     name cognomen  nomen person_id gender praenomen
## 1    Noniae P.f. Optatae   Optata  Nonia         1 female      <NA>
## 2      C. Iulio Artemoni   Artemo Iulius         2   male        C.
## 3 C. Iulius C.f. Optatus  Optatus Iulius         3   male        C.
## 
## [[2]]
##     nomen praenomen person_id age: years cognomen gender             name
## 1 Sextius        C.         1         70    Paris   male C. Sextius Paris
EDH_unnested<- EDH_tibble %>% 
  unnest(people)

1.1 What are names of all attributes within the ‘people’ attribute

setdiff(names(EDH_unnested), names(EDH_tibble))
##  [1] "name"        "cognomen"    "nomen"       "person_id"   "gender"     
##  [6] "praenomen"   "age: years"  "tribus"      "status"      "occupation" 
## [11] "origo"       "age: days"   "age: months" "supernomen"  "age: hours"

1.2 How many people are in total in the EDH database?

One way through gender

EDH_unnested %>% 
  count(gender, sort = TRUE) -> gender

sum(gender$n)
## [1] 92427

Second way through nrow

nrow(EDH_unnested)
## [1] 92427

1.3 How many people there are per inscription (average, min, max)

summary(as.numeric(EDH_unnested$person_id))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   2.000   4.233   3.000 244.000

2 Gender

2.1 What is the gender ratio of people on inscriptions? (male, female, NA)

EDH_unnested %>% 
  count(gender, sort = TRUE)

3 Status

3.1 What are the names of unique values in the ‘status’ attribute?

EDH_unnested$status %>% 
  unique()
##  [1] NA                                                                                              
##  [2] "senatorial order"                                                                              
##  [3] "slaves"                                                                                        
##  [4] "freedmen / freedwomen"                                                                         
##  [5] "freedmen / freedwomen?"                                                                        
##  [6] "slaves?"                                                                                       
##  [7] "senatorial order?"                                                                             
##  [8] "decurial order, higher local offices"                                                          
##  [9] "military personnel"                                                                            
## [10] "equestrian order"                                                                              
## [11] "decurial order, higher local offices?"                                                         
## [12] "equestrian order?"                                                                             
## [13] "military personnel?"                                                                           
## [14] "Augustales"                                                                                    
## [15] "emperor / imperial household?"                                                                 
## [16] "Augustales; freedmen / freedwomen"                                                             
## [17] "equestrian order?; decurial order, higher local offices"                                       
## [18] "decurial order, higher local offices; freedmen / freedwomen"                                   
## [19] "equestrian order; decurial order, higher local offices"                                        
## [20] "lower local offices, administration of imperial estates"                                       
## [21] "equestrian order; freedmen / freedwomen"                                                       
## [22] "decurial order, higher local offices; military personnel"                                      
## [23] "Augustales?"                                                                                   
## [24] "equestrian order; military personnel"                                                          
## [25] "rulers (foreign)"                                                                              
## [26] "senatorial order; equestrian order"                                                            
## [27] "emperor / imperial household; equestrian order"                                                
## [28] "decurial order, higher local offices; Augustales"                                              
## [29] "lower local offices, administration of imperial estates; freedmen / freedwomen"                
## [30] "senatorial order; decurial order, higher local offices"                                        
## [31] "equestrian order?; decurial order, higher local offices?"                                      
## [32] "Augustales; decurial order, higher local offices"                                              
## [33] "decurial order, higher local offices; military personnel?"                                     
## [34] "lower local offices, administration of imperial estates?"                                      
## [35] "decurial order, higher local offices; equestrian order"                                        
## [36] "decurial order, higher local offices; Augustales?"                                             
## [37] "emperor / imperial household; decurial order, higher local offices"                            
## [38] "decurial order, higher local offices?; lower local offices, administration of imperial estates"
## [39] "freedmen / freedwomen; military personnel"                                                     
## [40] "equestrian order; decurial order, higher local offices; military personnel"                    
## [41] "decurial order, higher local offices; lower local offices, administration of imperial estates" 
## [42] "lower local offices, administration of imperial estates; military personnel"                   
## [43] "decurial order, higher local offices?; military personnel"                                     
## [44] "equestrian order?; military personnel?"                                                        
## [45] "lower local offices, administration of imperial estates; Augustales"                           
## [46] "equestrian order; decurial order, higher local offices?"                                       
## [47] "senatorial order?; equestrian order?"                                                          
## [48] "decurial order, higher local offices?; freedmen / freedwomen"                                  
## [49] "Augustales?; freedmen / freedwomen"                                                            
## [50] "equestrian order?; lower local offices, administration of imperial estates"

3.2 What is the ratio of different statuses, e.g. slave vs freedman

str_split_fixed(EDH_unnested$status, ";", n=3) %>% 
  as.data.frame() -> status

status %>% 
  cbind(combined = c(status$V1,status$V2,status$V3)) %>% 
  filter(combined != "") %>% 
  mutate(combined_clean = str_replace_all(string = combined, pattern = "\\?", replacement = "")) %>% 
  mutate(combined_clean = str_replace_all(string = combined_clean, pattern = "^ ", replacement = "")) %>% 
  count(combined_clean, sort=TRUE) -> status_counts

status_counts 
status_counts %>% 
  mutate(combined_clean = reorder(combined_clean, n)) %>% 
  ggplot(aes(y=combined_clean, x=n, fill=combined_clean)) +
  geom_col(width=0.8, stat="identity") +
  coord_cartesian(xlim=c(0,10000)) +
  labs(x = "Number of instances", y = "Status category", title = "Overview of status references in the EDH dataset", subtitle = ggtitle(paste("n =", nrow(EDH_tibble), "inscriptions"))) +
  geom_label(aes(label= n)) +
  theme_linedraw(base_size = 12) +
  theme_minimal() 

ggsave("../EDH_people/figures/Status_overview.png", width = 12, height = 8) 

4 Age

4.1 How many inscriptions have ‘Age’ category?

EDH_unnested %>% 
  select('age: days', 'age: months', 'age: hours', 'age: years') %>%
  filter(!is.na(EDH_unnested$`age: years`) | !is.na(EDH_unnested$`age: months`) | !is.na(EDH_unnested$`age: days`) |!is.na(EDH_unnested$`age: hours`))

4.2 What are the unique values for years

unique(EDH_unnested$`age: years`)
##   [1] NA                   "70"                 "42"                
##   [4] "18"                 "8"                  "at least 20"       
##   [7] "35"                 "34"                 "5"                 
##  [10] "10"                 "25"                 "55"                
##  [13] "at least 10"        "2"                  "1"                 
##  [16] "64"                 "53"                 "36"                
##  [19] "29"                 "15"                 "40"                
##  [22] "30"                 "7"                  "75"                
##  [25] "4"                  "100"                "19"                
##  [28] "50"                 "at least 21"        "23"                
##  [31] "26"                 "27"                 "60"                
##  [34] "14"                 "24"                 "90"                
##  [37] "20"                 "43"                 "data not available"
##  [40] "16"                 "37"                 "22"                
##  [43] "3"                  "at least 40"        "45"                
##  [46] "61"                 "at least 50"        "65"                
##  [49] "21"                 "85"                 "6"                 
##  [52] "48"                 "57"                 "67"                
##  [55] "at least 16"        "73"                 "41"                
##  [58] "33"                 "82"                 "52"                
##  [61] "80"                 "12"                 "17"                
##  [64] "38"                 "at least 35"        "44"                
##  [67] "62"                 "68"                 "at least 30"       
##  [70] "9"                  "28"                 "at least 2"        
##  [73] "39"                 "at least 23"        "11"                
##  [76] "at least 17"        "at least 70"        "32"                
##  [79] "13"                 "89"                 "at least 1"        
##  [82] "47"                 "51"                 "93"                
##  [85] "at least 15"        "86"                 "87"                
##  [88] "at least 37"        "72"                 "at least 5"        
##  [91] "at least 12"        "63"                 "at least 3"        
##  [94] "31"                 "at least 8"         "54"                
##  [97] "at least 9"         "at least 26"        "at least 25"       
## [100] "46"                 "81"                 "59"                
## [103] "103"                "at least 60"        "at least 61"       
## [106] "76"                 "84"                 "at least 75"       
## [109] "at least 19"        "at least 22"        "92"                
## [112] "69"                 "at least 110"       "102"               
## [115] "71"                 "56"                 "at least 41"       
## [118] "105"                "83"                 "95"                
## [121] "at least 28"        "at least 6"         "at least 11"       
## [124] "at least 76"        "99"                 "58"                
## [127] "66"                 "78"                 "74"                
## [130] "49"                 "at least 45"        "77"                
## [133] "at least 32"        "at least 24"        "91"                
## [136] "120"                "at least 65"        "at least 31"       
## [139] "at least 7"         "110"                "at least 27"       
## [142] "150"                "at least 51"        "at least 55"       
## [145] "at least 4"         "at least 13"        "at least 14"       
## [148] "104"                "at least 18"        "at least 44"       
## [151] "at least 101"       "at least 74"        "at least 42"       
## [154] "108"                "at least 90"        "125"               
## [157] "at least 77"        "at least 43"        "at least 46"       
## [160] "at least 36"        "at least 80"        "200"

4.3 How many people have their age stated in years

sum(!is.na(EDH_unnested$`age: years`))
## [1] 7993
EDH_unnested %>% 
  select('age: days', 'age: months', 'age: hours', 'age: years') %>%
  filter(!is.na(EDH_unnested$`age: years`))

4.4 How many people have their age stated in months

sum(!is.na(EDH_unnested$`age: months`))
## [1] 928
EDH_unnested %>% 
  select('age: days', 'age: months', 'age: hours', 'age: years') %>%
  filter(!is.na(EDH_unnested$`age: months`))

4.5 How many people have their age stated in days

sum(!is.na(EDH_unnested$`age: days`))
## [1] 689
EDH_unnested %>% 
  select('age: days', 'age: months', 'age: hours', 'age: years') %>%
  filter(!is.na(EDH_unnested$`age: days`))

4.6 How many people have their age stated in hours

sum(!is.na(EDH_unnested$`age: hours`))
## [1] 24
EDH_unnested %>% 
  select('age: days', 'age: months', 'age: hours', 'age: years') %>%
  filter(!is.na(EDH_unnested$`age: hours`))

4.7 What is the average age of people (years, months, days)

Not ideal method as it skips a lot of textual descriptions

summary(as.numeric(EDH_unnested$`age: years`))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    1.00   18.00   30.00   33.66   50.00  200.00   85548
summary(as.numeric(EDH_unnested$`age: months`))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    1.00    3.00    6.00    5.91    8.00   30.00   91583
summary(as.numeric(EDH_unnested$`age: days`))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     1.0     7.0    13.0    14.5    20.0   100.0   91819
summary(as.numeric(EDH_unnested$`age: hours`))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##       3       4       6       6       8      11   92406

Better method using regular expressions to detect years and converting them as numeric

EDH_unnested %>% 
  select('age: days', 'age: months', 'age: hours', 'age: years') %>% 
  mutate(age_years = as.numeric(str_extract(EDH_unnested$'age: years', pattern = "[:digit:]+"))) %>% 
  mutate(age_months = as.numeric(str_extract(EDH_unnested$'age: months', pattern = "[:digit:]+"))) %>%
  mutate(age_days = as.numeric(str_extract(EDH_unnested$'age: days', pattern = "[:digit:]+"))) %>%
  mutate(age_hours = as.numeric(str_extract(EDH_unnested$'age: hours', pattern = "[:digit:]+"))) -> ages
summary(ages$age_years)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     1.0    17.0    30.0    32.7    46.0   200.0   84958
summary(ages$age_months)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    1.00    3.00    5.00    5.83    8.00   30.00   91546
summary(ages$age_days)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    1.00    7.00   13.00   14.34   20.00  100.00   91780
summary(ages$age_hours)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    3.00    4.25    6.00    5.95    7.75   11.00   92405

4.8 Combining all ages (years, months, days, hours) into one column

ages <- ages %>% 
  mutate(months_to_years = age_months / 12) %>% 
  mutate(days_to_years = age_days / 365) %>% 
  mutate(hours_to_years = age_hours / (24*365))


ages <- ages %>%
  replace_na(list(months_to_years = 0, days_to_years = 0, hours_to_years = 0)) %>% 
  mutate(total_age = age_years + months_to_years + days_to_years + hours_to_years) %>% 
  select(-ends_with("to_years"))

4.9 Combine the total age with the rest of the dataset

EDH_age<- EDH_unnested %>% 
  mutate(age_years = as.numeric(str_extract(EDH_unnested$'age: years', pattern = "[:digit:]+"))) %>% 
  mutate(age_months = as.numeric(str_extract(EDH_unnested$'age: months', pattern = "[:digit:]+"))) %>%
  mutate(age_days = as.numeric(str_extract(EDH_unnested$'age: days', pattern = "[:digit:]+"))) %>%
  mutate(age_hours = as.numeric(str_extract(EDH_unnested$'age: hours', pattern = "[:digit:]+"))) %>% 
  mutate(months_to_years = age_months / 12) %>% 
  mutate(days_to_years = age_days / 365) %>% 
  mutate(hours_to_years = age_hours / (24*365)) %>% 
  replace_na(list(months_to_years = 0, days_to_years = 0, hours_to_years = 0)) %>% 
  mutate(total_age = age_years + months_to_years + days_to_years + hours_to_years) %>% 
  select(-ends_with("to_years")) 

4.10 Summary of age in years

summary(EDH_age$total_age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    1.00   17.00   30.00   32.75   46.00  200.00   84958

4.10.1 How many percent of people state their age on inscriptions

length(na.omit(EDH_age$total_age))/(nrow(EDH_age)/100)
## [1] 8.080972

5 Occupation

5.1 What are the names of unique values in the ‘occupation’ attribute?

EDH_unnested %>% 
  dplyr::filter(occupation == "data available") %>% 
  select()

Unfortunately, ant other details about the occupation are not provided.

6 Origin

origo<- as.data.frame(EDH_unnested$origo) 
origo
origo %>% 
  filter(`EDH_unnested$origo` != "") %>% 
  #mutate(clean_origo = str_replace_all(string = combined, pattern = "\\?", replacement = "")) %>%
  count(`EDH_unnested$origo`, sort=TRUE)

6.1 What kind of inscriptions has origo

EDH_unnested %>% 
  filter(!is.na(origo)) %>% 
  count(type_of_inscription_clean, sort=T)

6.2 What is the findspot vs origo

EDH_unnested %>% 
  filter(!is.na(origo) & type_of_inscription_clean == "epitaph") %>% 
  select(origo, findspot_ancient_clean) %>% 
  count(findspot_ancient_clean, origo, sort=T)
EDH_unnested %>% 
  filter(!is.na(origo)) -> has_origo

6.3 Mapping inscriptions with origo

EDH_unnested<- EDH_unnested %>% 
  separate(col = coordinates, into = c("longitude", "latitude"), sep = ",")

EDH_unnested$latitude <- as.numeric(str_replace(EDH_unnested$latitude, pattern = "\\)", replacement=""))
EDH_unnested$longitude <- as.numeric(str_replace(EDH_unnested$longitude, pattern = "c\\(", replacement=""))
library(raster)
library(sf)

origo_mapped<- leaflet(width="100%") %>%
 #addProviderTiles("Stamen.Watercolor")%>% # Add CartoDB map tiles
 addProviderTiles("Stamen.TerrainBackground")%>% # Add CartoDB map tiles
 #addProviderTiles("Esri.WorldTopoMap", group = "Topo") %>%
 #addProviderTiles("Esri.WorldImagery", group = "ESRI Aerial") %>%
  #setView( lng = 35.9239625, lat = 31.9515694, zoom = 5 ) %>%
  #setMaxBounds(lat1=43.633977, lng1 =-11.227926 , lat2=35.133882 , lng2=50.882336) %>%
  #addPolylines(data = roads, color = "purple", weight = 1, opacity = 0.7) %>% 
 addCircles(lng = EDH_unnested$longitude, 
             lat = EDH_unnested$latitude, opacity = 0.1, radius = 2, fill = TRUE, color = "red" , fillColor = "red",
             ) %>% 
  #addCircles(lng = xx$X, 
  #           lat = xx$Y,
  #           opacity = 0.5, radius = 15, fill = TRUE, color = "red" , fillColor = "black", popup = paste0("<b> City: </b>", xx$Name)) %>% 
addLegend(position = "bottomright",
  colors = c("Red"),
  labels = c("Inscriptions"), opacity = 1,
  title = "Inscriptions with origo statement" 
) %>% 
  addScaleBar(position="bottomleft")

origo_mapped

6.4 Heat map of inscriptions with origo

# https://www.supplychaindataanalytics.com/leaflet-heatmaps-in-r/
 
#install.packages("leaflet.extras")
library(leaflet.extras)
library(RColorBrewer)

heat_origo <- EDH_unnested %>% 
  leaflet(width="100%") %>%
  addTiles() %>% 
  #addProviderTiles("Esri.WorldImagery", group = "ESRI Aerial") %>%
  #addProviderTiles("Esri.WorldShadedRelief", group = "ESRI Aerial") %>% 
  #addProviderTiles("Stamen.TerrainBackground") %>%
  addProviderTiles("Stamen.TonerBackground") %>%
  #addProviderTiles("CartoDB.VoyagerNoLabels") %>%
  setView( lng = 12.9239625, lat = 41.9515694, zoom = 4 ) %>%
  #setMaxBounds(lat1=40.633977, lng1 =-4.227926 , lat2=35.133882 , lng2=40.882336) %>%
 
 addHeatmap(lng = ~as.numeric(na.omit(EDH_unnested$longitude)), lat = ~as.numeric(na.omit(EDH_unnested$latitude)), 
            intensity = 0.1, layerId = NULL, group = NULL, minOpacity = 0.1, #max = 1,
  radius = 2, blur = 3, gradient = "YlOrRd", cellSize = 1,
  )

heat_origo

7 Special focus - Funerary inscriptions

7.1 Age

EDH_age %>% 
  filter(type_of_inscription_clean == "epitaph") -> epitaph

How many percent of people on funerary inscriptions state their age on inscriptions

length(na.omit(epitaph$total_age))/(nrow(epitaph)/100)
## [1] 15.51995

7.1.1 What is the age of people on funerary inscriptions by province

epitaph %>% 
  dplyr::select(total_age, province_label_clean) %>% 
  count(total_age, province_label_clean, sort=TRUE) %>% 
  ggplot(aes(x=total_age, y= fct_rev(province_label_clean))) + geom_point(alpha=0.5, color="darkblue") +
  theme_minimal()

ggsave("../EDH_people/figures/Age_years_provinces.png", width = 8, height = 8) 

7.1.2 What is the average age of people on funerary inscriptions by province

install.packages("psych")
library(psych)

describeBy(epitaph$total_age, group = epitaph$province_label_clean, mat = TRUE, digits = 2) -> age_provinces
head(age_provinces)

7.1.3 Average age by provinces (All inscriptions)

age_provinces %>% 
  filter(n>0) %>% 
  mutate(group1 = reorder(group1, mean)) %>%  
  ggplot(aes(y=group1, x=mean)) +
  geom_col(color="white", fill="blue", width=0.8, stat="identity") +
  #coord_cartesian(xlim=c(0,80)) +
  theme_minimal() +
  theme(text = element_text(size=16)) +
  labs(y="Roman province", x="Years", title= "Average age of people on inscriptions in the EDH database by province", subtitle = "n= 7993 people")+ 
  #geom_label(aes(label= mean)) +
  geom_label(aes(label = mean), colour = "black", fontface = "bold", hjust = -0.1) 

ggsave("../figures/Age_average_years_provinces.png", width = 12, height = 12) 

7.1.4 ### Average age by provinces, with more than 100 inscriptions containing age per province

age_provinces %>% 
  filter(n>100) %>% 
  mutate(group1 = reorder(group1, mean)) %>%  
  ggplot(aes(y=group1, x=mean)) +
  geom_col(color="white", fill="blue", width=0.8, stat="identity") +
  #coord_cartesian(xlim=c(0,80)) +
  theme_minimal() +
  theme(text = element_text(size=16)) +
  labs(y="Roman province ", x="Years", title= "Average age of people on inscriptions in the EDH database by province with more than 100 inscriptions stating age", subtitle = "n= 7993 people")+ 
  #geom_label(aes(label= mean)) +
  geom_label(aes(label = mean), colour = "black", fontface = "bold", hjust = -0.1)

ggsave("../figures/Age_average_years_provinces_100plus_inscr.png", width = 12, height = 12) 

7.1.5 Inscriptions with age by provinces

age_provinces %>% 
  filter(n>0) %>% 
  mutate(group1 = reorder(group1, n)) %>%  
  ggplot(aes(y=group1, x=n)) +
  geom_col(color="white", fill="purple", width=0.8, stat="identity") +
  #coord_cartesian(xlim=c(0,80)) +
  theme_minimal() +
  theme(text = element_text(size=16)) +
  labs(y="Roman province", x="n", title= "Instances of age information in the EDH database by province", subtitle = "n= 7993 people")+ 
  #geom_text(aes(label= n),hjust = -0.4) +
  geom_label(aes(label = n), colour = "black", fontface = "bold", hjust = -0.4) 

ggsave("../figures/Age_info_provinces.png", width = 12, height = 12) 

7.1.6 Inscriptions by age groups

EDH_age<- EDH_age %>%
  mutate(agegroup = case_when(total_age < 3 ~  "0-2.99",
                              total_age < 15 ~ "3-14.99",
                              total_age < 30 ~ "15-29.99",
                              total_age < 40 ~ "30-39.99",
                              total_age < 60 ~ "40-59.99",
                              total_age > 60 ~ "over 60"))

EDH_age$agegroup <- factor(EDH_age$agegroup, levels = c("0-2.99", "3-14.99","15-29.99", "30-39.99", "40-59.99","over 60"))  
EDH_age %>% 
  filter(agegroup != "NA") %>%
  #count(agegroup, sort = TRUE) %>%
  #mutate(agegroup_sorted = reorder(agegroup, n)) %>% 
  ggplot() +
  geom_bar(mapping = aes(x = agegroup, fill = agegroup))+
  labs(x = "Age group (years)", y = "Number of instances", title = "Representation of age groups on funerary inscriptions (EDH dataset)", subtitle = ggtitle(paste("n =", nrow(filter(EDH_age, agegroup != "NA")), "inscriptions")))

  #+ geom_label(aes(label = agegroup), colour = "black", fontface = "bold", hjust = -0.4)
  

  
ggsave("../EDH_people/figures/Age_groups_epitaphs.png", width = 12, height = 8) 

7.1.7 Children younger than 10

EDH_age<- EDH_age %>% 
  mutate(age10 = ifelse(total_age < 10, "under10", "over10"))

7.1.7.1 Province

EDH_age %>% 
  count(age10, province_label_clean, sort=F) %>% 
  ggplot(aes(fill=age10, y=province_label_clean, x=n)) +
  geom_bar(position="fill", stat="identity") +
  theme_minimal() +
  #theme(text = element_text(size=16)) +
  labs(y="Roman province", x="n", title= "Ratio of age children under 10 years on inscriptions per province") 

  #geom_text(aes(label= n),hjust = -0.4) 
  #geom_label(aes(label = n), colour = "black", fontface = "bold", hjust = -0.4) 
EDH_age10<- EDH_age %>% 
  count(age10, province_label_clean, sort=F) %>% 
  spread(key=age10, value=n) 

EDH_under10<- EDH_age10 %>% 
  mutate(total = rowSums(EDH_age10[2:4], na.rm=TRUE)) %>% 
  mutate(under10_ratio = under10 / (total/100)) %>% 
  mutate(over10_ratio = over10 / (total/100)) %>% 
  mutate(age_stated = (over10+under10) / (total/100))


EDH_under10 

7.1.8 Average age by individual years

Using not before and not after date separately.

describeBy(epitaph$total_age, group = epitaph$not_before, mat = TRUE, digits = 2) -> age_not_before

describeBy(epitaph$total_age, group = epitaph$not_after, mat = TRUE, digits = 2) -> age_not_after

7.1.9 When (which years) do people state their age on funerary inscriptions

age_not_bf_plot<- age_not_before %>% 
  ggplot(aes(x=group1, y=n)) +
  geom_point() +
  geom_vline(xintercept = 1, linetype="dotted", 
                color = "green", size=0.5) +
  geom_vline(xintercept = 100, linetype="dotted", 
                color = "blue", size=0.5) +
  geom_vline(xintercept = 200, linetype="dotted", 
                color = "red", size=0.5)
age_not_aft_plot<- age_not_after %>% 
  ggplot(aes(x=group1, y=n)) +
  geom_point() +
  geom_vline(xintercept = 1, linetype="dotted", 
                color = "green", size=0.5) +
  geom_vline(xintercept = 100, linetype="dotted", 
                color = "blue", size=0.5) +
  geom_vline(xintercept = 200, linetype="dotted", 
                color = "red", size=0.5)

Commentary: People state their age in the second century the most!

7.1.10 What is the average age of people on inscriptions in time

age_not_before %>% 
  filter(!is.na(mean)) %>% 
  ggplot(aes(x=as.numeric(group1), y=as.numeric(mean))) +
  geom_point() +
  geom_smooth() +
  geom_vline(xintercept = 0, linetype="dotted", 
               color = "green", size=0.5) +
  geom_vline(xintercept = 100, linetype="dotted", 
                color = "blue", size=0.5) +
  geom_vline(xintercept = 200, linetype="dotted", 
                color = "red", size=0.5) +
  geom_vline(xintercept = 300, linetype="dotted", 
                color = "brown", size=0.5)

age_not_after %>% 
  filter(!is.na(mean)) %>% 
  ggplot(aes(x=as.numeric(group1), y=as.numeric(mean))) +
  geom_point() +
  geom_smooth() +
  geom_vline(xintercept = 0, linetype="dotted", 
               color = "green", size=0.5) +
  geom_vline(xintercept = 100, linetype="dotted", 
                color = "blue", size=0.5) +
  geom_vline(xintercept = 200, linetype="dotted", 
                color = "red", size=0.5) +
  geom_vline(xintercept = 300, linetype="dotted", 
                color = "brown", size=0.5)

7.2 Gender

epitaph %>% 
  count(gender, sort=TRUE) %>% 
  mutate(ratio_total = n/(nrow(epitaph)/100)) %>% 
  mutate(ratio_total = reorder(ratio_total, n)) %>%
  ggplot(aes(y=gender, x=n)) +
  geom_col(color="white", fill="orange", width=0.8, stat="identity", fill=gender) +
  coord_cartesian(xlim=c(0,30000)) +
  theme_minimal() +
  theme(text = element_text(size=14)) +
  labs(y="Gender category", x="Number of instances", title= "Gender ratio on epitaphs in the EDH database", subtitle = "n = 47,803 inscriptions" ) +
  geom_label(aes(label= n)) 

# +  geom_text(aes(label = n), colour = "red", fontface = "bold", hjust = -0.1) 
  
ggsave("../EDH_people/figures/Gender_total_epitaphs.png", width = 12, height = 12)
epitaph %>% 
  count(gender, sort=TRUE) %>% 
  mutate(ratio_total = n/(nrow(epitaph)/100)) %>% 
  mutate(ratio_total = round(ratio_total, digits = 2)) %>% 
  filter(ratio_total >0.5) %>% 
  ggplot(aes(y=gender, x=ratio_total, fill=gender)) +
  geom_bar(width=0.8, stat="identity") +
  coord_cartesian(xlim=c(0,60)) +
  theme_minimal() +
  theme(text = element_text(size=16)) +
  labs(y="Gender category", x="%", title= "Gender ratio on epitaphs in the EDH database", subtitle = ggtitle(paste("n =", nrow(epitaph), "inscriptions" ))) +
  geom_label(aes(label= ratio_total), hjust = -0.1) 

#+ geom_text(aes(label = ratio_total)) 

ggsave("../EDH_people/figures/Gender_epitaphs.png", width = 8, height = 8)
epitaph %>% 
  dplyr::filter(gender == "male"| gender == "M?") -> epitaph_male

epitaph %>% 
  dplyr::filter(gender == "female"| gender == "F?") -> epitaph_female
gender_true_ratio <- as.data.frame(cbind(male=nrow(epitaph_male), female=nrow(epitaph_female)))
gender_true_ratio %>% 
  gather() %>% 
  rename(gender = key) %>% 
  rename(n = value) %>% 
  as.data.frame() -> gender_ratio 

gender_ratio %>% 
  ggplot(aes(x=gender, y=n)) +
  geom_col(color="white", fill="orange", width=0.5, stat="identity") +
  theme_minimal() +
  theme(text = element_text(size=14)) +
  labs(x="Gender category", y="Number of people", title= "Gender ratio on epitaphs in the EDH database", subtitle = "n = 47,803 inscriptions" ) + 
  #geom_label(aes(label= n)) +
  geom_label(aes(label = n), colour = "black", fontface = "bold", vjust = 0.5) 

ggsave("../EDH_people/figures/Gender_total_ratio_epitaphs.png", width = 12, height = 12)
gender_ratio %>% 
  mutate(ratio = n/(sum(n))*100) %>%
  mutate(ratio = round(ratio, digits =2)) %>% 
  ggplot(aes(x=gender, y=ratio), color=gender) +
  geom_col(width=0.5, stat="identity", fill = "brown") +
  theme_minimal() +
  theme(text = element_text(size=14)) +
  labs(x="Gender category", y="%", title= "Gender ratio on epitaphs in the EDH database", subtitle = "n = 47,803 inscriptions" ) + 
  #geom_label(aes(label= ratio)) + 
  geom_label(aes(label = ratio), colour = "black", fontface = "bold", vjust = 0.5) 

ggsave("../EDH_people/figures/Gender_ratio_epitaphs.png", width = 12, height = 12)